
capture log close
capture clear all
 set matsize 10000

**********************This program sets up data to look at the impact of home detention (relative to imprisonment ) on reoffending. 
***********************It draws on data up to & inlcuding 2007 for the analysis sample of index offenses(because home detention was not used much from 2008-2013),
 

cd "C:\Users\EM"

 ****step 1: read in the base data set********************************
. use EM_BOCSAR.dta, clear
.   gen aborig=(indig==1)
.   gen nolegrep=1 if legrep==0
.   replace nolegrep=0 if legrep==1|legrep==2
.   gen age2=age*age

. keep if gender<2
    format indexdate %td
.   gen year=year(indexdate)
    gen month=month(indexdate)
	
 *****categorize crime according to ANZOG codes*******************
    gen injury=1 if ((index_poanzsoc<200)|(index_poanzsoc>199 & index_poanzsoc<300))
    gen neg_driving_acts=1 if (index_poanzsoc>399 & index_poanzsoc<500)
    gen break_enter=1 if (index_poanzsoc>699 & index_poanzsoc<800)
    gen theft=1 if (index_poanzsoc>799 & index_poanzsoc<900)
    gen fraud=1 if  (index_poanzsoc>899 & index_poanzsoc<1000) 
    gen pub_order=1 if ((index_poanzsoc>1199 & index_poanzsoc<1300)|(index_poanzsoc>1299 & index_poanzsoc<1400))
    gen traffic=1 if (index_poanzsoc>1399 & index_poanzsoc<1500)
    gen govt_proc=1 if ((index_poanzsoc>1499 & index_poanzsoc<1600)|(index_poanzsoc>1599 & index_poanzsoc<1700)|(index_poanzsoc>999 & index_poanzsoc<1100))
   
    gen jcrime_type=1 if injury==1
    replace jcrime_type=2 if neg_driving_acts==1
    replace  jcrime_type=3 if break_enter==1
    replace jcrime_type=4 if theft==1
    replace jcrime_type=5 if fraud==1
    replace jcrime_type=6 if pub_order==1
    replace jcrime_type=7 if traffic==1
    replace jcrime_type=8 if govt_proc==1
  
  . label define jcrime_type 1"injury" 2"negligent driving" 3"break & enter" 4"theft" 5"fraud" 6"public order" 7"traffic" 8"govt procedures" 
  . label values jcrime_type jcrime_type

. destring CourtLocationID, ignore("BCSR_LC_, BCSR_OLD_, LOC")  gen(courtid)
 
*** Home detention only an option for prison sentences of 18 months or less, so need to grab the relevent prison sentences
   gen pris_sent_18=(index_ppencode==1 & index_ptotval<=18)
   drop if index_ppencode==1 & pris_sent_18==0 & indexapr_flag=="Yes"

***BOCSAR coding of home detention & prison has some problems so construct myself from penalty code variable
  gen jindex_pris=(index_ppencode==1)
  tab index_pris jindex_pris
  gen jindex_hd=(index_ppencode==3) 
  tab index_hd jindex_hd
  sort mspdi
  save temp1, replace

 *************************************************************************************************
***********read in the referrals and constructed the referral variable**************************
************************************************************************************************
import delimited "C:\Users\referrals.csv", clear
	duplicates drop  start_date last_name first_name birth_date mspdi, force
	drop if court_id=="PAROLE"
		drop if mspdi==.
	generate ref_date= date(court_event_date, "DMY")
	keep mspdi ref_date
	format ref_date %td
 
	sort mspdi 
	merge mspdi using temp1

	rename PrimaryIndexDate primaryindexdate
    gen referral=(index_pooffdate < ref_date & ref_date <indexdate)
    replace referral=0 if ref_date==.

***********************************************************************************
**********Construct Judge Tendency to Use EM
**********time period 2000-2007
**********offences eligible for HD
**********offences punished with HD or prison
**********courts with more than 1 judge
**********judges who hear more than 10 eligible cases
***********************************************************************************
   keep if year<2008 
   gen drop_charge=(index_poanzsoc==111 | index_poanzsoc==121| index_poanzsoc== 131|index_poanzsoc== 211| (index_poanzsoc>=311& index_poanzsoc<=329)|( index_poanzsoc>=511& index_poanzsoc<=532)|( index_poanzsoc>=611& index_poanzsoc<=621)| (index_poanzsoc>= 1011 & index_poanzsoc<=1021)| index_poanzsoc== 1099| (index_poanzsoc>= 1111 & index_poanzsoc <=1129))
   drop if drop_charge==1

   drop if index_ppencode>17 /*18 is fine*/
   drop if index_ppencode==10 /* community service order -- 4 turn up in the smaple*/
   drop if index_ppencode==2|index_ppencode==9  /**juvenile punishments**/
   drop if index_ppencode==4|index_ppencode==12|index_ppencode==13  /** periodic detention; bond supervised; bond unsuperivsed***/
   drop if (index_ppencode==6 | index_ppencode==7)/**drop suspended sentence**/
   gen elig=((index_ptotval<=18 & index_ppencode==1) | (index_ppenval<=18 & index_ppencode>5 & index_ppencode<8)|index_ppencode==3)
   drop if elig~=1

*****clean up judge variable ******
. replace JudicialOfficerCode="." if JudicialOfficerCode=="UNKNOWN"
. replace JudicialOfficerCode="." if JudicialOfficerCode==""
. tab JudicialOfficerCode, m
. destring JudicialOfficerCode,ignore("J" "M" "B" "C" "L" " ")  gen(judge_code)
. drop if judge_code==.

****drop cases outside of Sydney & aboriginal defendents***********************
  keep if remotenessarea <1
  keep if aborig==0
  drop _merge

**keeep judges who hear more than 10 cases
. bysort judge_code: gen x1=_n 
. tab x1
. by judge_code: egen xx1=max(x1) 
. drop if xx1<10
save temp2, replace

*****make sure there is more than 1 judge at each court***********************
use temp2, clear

*****count number of judges per court location id and year 
  egen x=count(1), by(CourtLocationID year judge_code) 
.  sort CourtLocationID year judge_code
.  collapse x, by(CourtLocationID year judge_code) 
.  egen numjudge=count(1), by(CourtLocationID year) 
.  tab numjudge year
   save tempx, replace
 
*****drop courts with 1 judge 
 use temp2, clear
   sort CourtLocationID year judge_code
   merge CourtLocationID year judge_code using tempx
 . drop if numjudge==1 
 
 /*****this affects total number of cases a judge has seen....check still atleast 10 cases****/
   rename x1 x1_old
   rename xx1 xx1_old
 . bysort judge_code: gen x1=_n 
 . by judge_code: egen xx1=max(x1) 
.  drop if xx1<10
   drop xx1 x1
 
   bysort judge_code: gen x1=_n 
.  tab x1
.  by judge_code: egen xx1=max(x1) 
.  tab xx1
**all judges have 10 or more relevant cases

**************************************************************************
*******************use referral data here******************************
***call referrels to EM jindex_hd, sentences of EM called jindex_old
**************************************************************************
  rename jindex_hd jindex_hd_old

  gen jindex_hd=referral==1
save temp3, replace
 
******We now have judges who have heard more than 10 cases, in courts with more than 1 judge********

  use temp3, clear
  sort judge_code

 *****Construct leave one out averages of judges prob of giving hd 
.  by judge_code: egen sum_hd1=sum(jindex_hd)
.  gen meano_hd1=(sum_hd1-jindex_hd)/(xx1-1)   
.  by judge_code: egen sum_hd1s=sum(index_ppenval) if jindex_hd==1
   by judge_code: egen sum_hd=sum(jindex_hd)
.  gen meano_hd1s=(sum_hd1s-index_ppenval)/(sum_hd-1) if jindex_hd==1 & sum_hd>1
   replace meano_hd1s=. if jindex_hd==1 & sum_hd<=1
 ********************************************************************************
 ***to ensure probs add up to 1
   rename jindex_pris jindex_pris_old
   gen jindex_pris=1-jindex_hd
 
  ********************Sentence_length
   gen sentencel=index_ppenval if jindex_hd==1
   replace sentencel=index_ptotval if jindex_pris==1
   by judge_code:egen sum_sentlen=sum(sentencel)
   gen sentlen=(sum_sentlen-sentencel)/(xx1-1)

 ************************Cosntruct the judge proclivity net of courst & year FE************************
. sort judge_code
  regress meano_hd1  i.courtid#i.year
  predict resid_hd,r
. sum resid_hd
  sum meano_hd1
. gen hd_mean = r(mean)


  regress sentlen  i.courtid#i.year
. predict resid_SL,r
  gen mean_resid_SL=r(mean)
  sum sentlen
. return list
. gen SL_mean = r(mean)

  gen punish=0 if jindex_pris_old==1
  replace punish=1 if jindex_hd_old==1
  
  gen nprior5_courtj=nprior5_court
  replace nprior5_courtj=10 if nprior5_court>=10
  drop _merge x*
    sort mspdi
  save EM.dta, replace
*****************************************************************************
***this data set only goes up to & inlcuding 2007, as it is used to construct judges tendency to refer to EM
*****************************************************************************

keep  if  indexapr_flag =="Yes" & year<2008
save EM_index.dta, replace
******* Merge index offense onto the full offense data set so we capture ALL subsequent reoffending*****
*******Construct time since index case finalization (final_free)
. use EM_BOCSAR.dta", clear

  sort mspdi 
 merge mspdi using EM_index.dta
  keep if _merge==3
  sort mspdi index_pooffdate
  gen flag=(index_pooffdate<PrimaryIndexDate) & indexapr_flag=="No"
  by mspdi:egen flag1=sum(flag)
  by mspdi: gen flagx=(flag==1 &_n==1)
  tab flagx
  gen final_free=indexdate-PrimaryIndexDate if indexapr_flag=="No" & (index_pooffdate>PrimaryIndexDate)
  egen cens=max(indexdate)
  format cens %td
   	
  ***select offences committed after the index case is finalized
 by mspdi: gen b=indexdate-PrimaryIndexDate if (index_pooffdate>PrimaryIndexDate)
  forvalues i=1/20 {
  gen aa`i'=(final_free<`i'*366/2)
  gen bb`i'=(final_free>(`i'-1)*366/2)
  by mspdi: egen numberoff`i'=sum(index_concurr) if aa`i'>0
  gen x`i'=(aa`i'>0)*(bb`i'>0)
  by mspdi: egen reoffx`i'=sum(x`i') if aa`i'>0
  by mspdi: gen reoff`i'=(reoffx`i'>0)
  replace reoff`i'=. if reoffx`i'==.
  replace numberoff`i'=0 if numberoff`i'==. & aa`i'==0
  replace reoff`i'=0 if reoff`i'==. & aa`i'==0
    }
**this is for offesnes that occur after frirst 18 months (Table 5)****
	 forvalues i=3/20 {
  by mspdi: egen reoff18x`i'=sum(x`i') if aa`i'>0
  by mspdi: gen reoff18`i'=(reoff18x`i'>0)
  replace reoff18`i'=. if reoff18x`i'==.
   replace reoff18`i'=0 if reoff18`i'==. & aa`i'==0
     }
 ***** NEW offenses defined by b>0
keep if b>0 & b~=.
sort mspdi indexdate
bysort mspdi (indexdate) : keep if _n == 1
 keep mspdi numberoff*  reoff* cens final_free
 sort mspdi 
 save numoff.dat, replace
 
 use EM_index.dta,clear
 
      sort mspdi 
	 merge mspdi using numoff.dat
	 forvalues i=1/20 {
	 replace numberoff`i'=0 if numberoff`i'==. & _merge==1 & indexapr_flag=="Yes" & year<2008
	  replace reoff`i'=0 if reoff`i'==. & _merge==1 & indexapr_flag=="Yes" & year<2008
	 	 }
	 
		 	 forvalues i=3/20 {
		  replace reoff18`i'=0 if reoff18`i'==. & _merge==1 & indexapr_flag=="Yes" & year<2008
	 	 }
  gen died=dod-primaryindexdate
  replace died=. if dod==.
  gen died_dum=(dod~=.)
  egen censs=max(cens)
  format censs %td
  sort mspdi
  by mspdi: gen cen=(censs-primaryindexdate)
  format cen %td
  gen noreoff=(final_free==.)
  replace final_free=td(30sep2016)-primaryindexdate if noreoff==1 & indexapr_flag=="Yes" & year<2008
save EM_Reoff.dta, replace
